Divergence Point Analysis on Eye-tracking Data

This R script generates timecourse plots of mean fixation proportion, and performs a statistical analysis to determine divergence points between fixations on different objects. It is adapted from the R script in the paper “Analysing data from the psycholinguistic visual‑world paradigm: Comparison of different analysis methods”, Ito & Knoeferle (2023).

The columns in the original eye-tracking data set are:

1. Data preparation

rm(list = ls()) # Clear the workspace
options(scipen = 999) # Turn off scientific notation
Packages
require(Rmisc)
require(tidyverse)
require(ggplot2)
require(boot)
require(dplyr)
require(lme4)
Package versions
##      lme4    Matrix      boot lubridate   forcats   stringr     dplyr     purrr 
##  "1.1-37"   "1.7-0"  "1.3-30"   "1.9.4"   "1.0.0"   "1.5.1"   "1.1.4"   "1.0.4" 
##     readr     tidyr    tibble   ggplot2 tidyverse     Rmisc      plyr   lattice 
##   "2.1.5"   "1.3.1"   "3.2.1"   "3.5.2"   "2.0.0"   "1.5.1"   "1.8.9"  "0.22-6"
Read the time-course output data of the eyetracker from file
fix.dat = read_delim("/Users/cindyzhang/Desktop/Spanish Gender Project/Spanish Gender Project/data-2025-03-12/df_cleaned.csv", delim=',')
# all the trackloss bins have been removed - in any given row, there is a fixation in one of the 3 AreaFixated 
Inspect the raw data
summary(fix.dat)
##       ...1            TOI            Participant          Timeline        
##  Min.   :     1   Length:732647      Length:732647      Length:732647     
##  1st Qu.:183162   Class :character   Class :character   Class :character  
##  Median :366324   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :366324                                                           
##  3rd Qu.:549486                                                           
##  Max.   :732647                                                           
##                                                                           
##       Bin         Bin_duration      Competitor       Distractor    
##  Min.   :  1.0   Min.   : 1.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:181.0   1st Qu.:10.000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :342.0   Median :10.000   Median :0.0000   Median :0.0000  
##  Mean   :338.2   Mean   : 9.991   Mean   :0.2076   Mean   :0.1891  
##  3rd Qu.:499.0   3rd Qu.:10.000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :729.0   Max.   :10.000   Max.   :1.0000   Max.   :1.0000  
##                                                                    
##      Target           TimeMS      Animacy_C          Animacy_D        
##  Min.   :0.0000   Min.   :  10   Length:732647      Length:732647     
##  1st Qu.:0.0000   1st Qu.:1810   Class :character   Class :character  
##  Median :1.0000   Median :3420   Mode  :character   Mode  :character  
##  Mean   :0.6033   Mean   :3381                                        
##  3rd Qu.:1.0000   3rd Qu.:4990                                        
##  Max.   :1.0000   Max.   :7283                                        
##                                                                       
##   Animacy_T          Determiner         N_ending_C         N_ending_D       
##  Length:732647      Length:732647      Length:732647      Length:732647     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   N_ending_T         N_Gender_C         N_Gender_D         N_Gender_T       
##  Length:732647      Length:732647      Length:732647      Length:732647     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  Phonol_Overlap_C   Phonol_Overlap_D Phonol_Overlap_T    ArticleOnset 
##  Length:732647      Mode:logical     Length:732647      Min.   :2403  
##  Class :character   NA's:732647      Class :character   1st Qu.:2448  
##  Mode  :character                    Mode  :character   Median :2479  
##                                                         Mean   :2490  
##                                                         3rd Qu.:2524  
##                                                         Max.   :2679  
##                                                                       
##    NounOnset     EndingOnset    EndingOffset    Remaining     SpeakerType  
##  Min.   :2496   Min.   :2591   Min.   :2838   Min.   :3005   Min.   :1.00  
##  1st Qu.:2576   1st Qu.:2762   1st Qu.:3020   1st Qu.:3144   1st Qu.:1.00  
##  Median :2613   Median :2859   Median :3079   Median :3230   Median :2.00  
##  Mean   :2619   Mean   :2877   Mean   :3107   Mean   :3239   Mean   :1.54  
##  3rd Qu.:2669   3rd Qu.:2972   3rd Qu.:3196   3rd Qu.:3315   3rd Qu.:2.00  
##  Max.   :2807   Max.   :3411   Max.   :3546   Max.   :3656   Max.   :2.00  
##                                                                            
##      Sex                 Age           Position         Trackloss      
##  Length:732647      Min.   :19.00    Length:732647      Mode :logical  
##  Class :character   1st Qu.:20.00    Class :character   FALSE:732647   
##  Mode  :character   Median :23.00    Mode  :character                  
##                     Mean   :23.75                                      
##                     3rd Qu.:25.00                                      
##                     Max.   :50.00                                      
##                     NA's   :142383                                     
##  TimeMS_adjusted   ArticleOnset_adjusted NounOnset_adjusted
##  Min.   :-2670.0   Min.   :0             Min.   : 70.0     
##  1st Qu.: -690.0   1st Qu.:0             1st Qu.:113.0     
##  Median :  930.0   Median :0             Median :128.0     
##  Mean   :  886.2   Mean   :0             Mean   :128.3     
##  3rd Qu.: 2490.0   3rd Qu.:0             3rd Qu.:144.0     
##  Max.   : 4750.0   Max.   :0             Max.   :202.0     
##                                                            
##  EndingOnset_adjusted EndingOffset_adjusted Remaining_adjusted
##  Min.   :170.0        Min.   :365.0         Min.   : 540.0    
##  1st Qu.:276.0        1st Qu.:544.0         1st Qu.: 668.0    
##  Median :388.0        Median :593.0         Median : 738.0    
##  Mean   :386.3        Mean   :616.7         Mean   : 748.9    
##  3rd Qu.:459.0        3rd Qu.:694.0         3rd Qu.: 818.0    
##  Max.   :773.0        Max.   :908.0         Max.   :1018.0    
##                                                               
##  GenderMatchTarget_D
##  Min.   :0.0000     
##  1st Qu.:0.0000     
##  Median :1.0000     
##  Mean   :0.5251     
##  3rd Qu.:1.0000     
##  Max.   :1.0000     
## 
head(fix.dat)
## # A tibble: 6 × 40
##    ...1 TOI        Participant Timeline   Bin Bin_duration Competitor Distractor
##   <dbl> <chr>      <chr>       <chr>    <dbl>        <dbl>      <dbl>      <dbl>
## 1     1 Trial01-d… TA105       VersionA    23           10          0          0
## 2     2 Trial01-d… TA105       VersionA    24           10          0          0
## 3     3 Trial01-d… TA105       VersionA    25           10          0          0
## 4     4 Trial01-d… TA105       VersionA    26           10          0          0
## 5     5 Trial01-d… TA105       VersionA    27           10          0          0
## 6     6 Trial01-d… TA105       VersionA    28           10          0          0
## # ℹ 32 more variables: Target <dbl>, TimeMS <dbl>, Animacy_C <chr>,
## #   Animacy_D <chr>, Animacy_T <chr>, Determiner <chr>, N_ending_C <chr>,
## #   N_ending_D <chr>, N_ending_T <chr>, N_Gender_C <chr>, N_Gender_D <chr>,
## #   N_Gender_T <chr>, Phonol_Overlap_C <chr>, Phonol_Overlap_D <lgl>,
## #   Phonol_Overlap_T <chr>, ArticleOnset <dbl>, NounOnset <dbl>,
## #   EndingOnset <dbl>, EndingOffset <dbl>, Remaining <dbl>, SpeakerType <dbl>,
## #   Sex <chr>, Age <dbl>, Position <chr>, Trackloss <lgl>, …
Remove rows where the time bins are not 10ms
fix.dat <- fix.dat %>% filter(Bin_duration == 10)
Add AreaFixated column
fix.dat <- fix.dat %>%
  mutate(AreaFixated = case_when(
    Competitor == 1 ~ "Competitor",
    Distractor == 1 ~ "Distractor",
    Target == 1 ~ "Target"
  ))
Add GenderMatch column
fix.dat <- fix.dat %>%
  mutate(GenderMatch = case_when(
    N_Gender_D == N_Gender_T ~ "D=T",
    N_Gender_D == N_Gender_C ~ "D=C"
  ))
Reshape binomially encoded fixation data to a long format for plotting and analysis

In the original experiment, the eyetracker takes 25 samples in each 50ms bin, and records the proportion of samples that fall in each AOI. Our eyetracker takes 1 sample in each 10ms bin, and records which AOI the sample falls in (we already removed the trackloss bins, so the sample must be in one of the 3 AOIs, and binomially encoded as 0 or 1).

fix.long <- fix.dat %>%
  pivot_longer(
    cols = c(Target, Competitor, Distractor),
    names_to = "AOI",
    values_to = "FixP"
  )
Change Participant, TOI, AreaFixated, SpeakerType, AOI, Phonol_Overlap_T and GenderMatch to factor
fix.long = fix.long %>% mutate_at(vars(Participant, TOI, AreaFixated, SpeakerType, AOI, Phonol_Overlap_T, GenderMatch), as.factor)
Merge the non minimal pair values
fix.long$Phonol_Overlap <- fix.long$Phonol_Overlap_T %>% fct_collapse(NonMinPair = c("First Syll-unstressed", "First Syll-stressed"))
Inspect the data after factoring
head(fix.long)
## # A tibble: 6 × 42
##    ...1 TOI   Participant Timeline   Bin Bin_duration TimeMS Animacy_C Animacy_D
##   <dbl> <fct> <fct>       <chr>    <dbl>        <dbl>  <dbl> <chr>     <chr>    
## 1     1 Tria… TA105       VersionA    23           10    230 Animate-… Animate-…
## 2     1 Tria… TA105       VersionA    23           10    230 Animate-… Animate-…
## 3     1 Tria… TA105       VersionA    23           10    230 Animate-… Animate-…
## 4     2 Tria… TA105       VersionA    24           10    240 Animate-… Animate-…
## 5     2 Tria… TA105       VersionA    24           10    240 Animate-… Animate-…
## 6     2 Tria… TA105       VersionA    24           10    240 Animate-… Animate-…
## # ℹ 33 more variables: Animacy_T <chr>, Determiner <chr>, N_ending_C <chr>,
## #   N_ending_D <chr>, N_ending_T <chr>, N_Gender_C <chr>, N_Gender_D <chr>,
## #   N_Gender_T <chr>, Phonol_Overlap_C <chr>, Phonol_Overlap_D <lgl>,
## #   Phonol_Overlap_T <fct>, ArticleOnset <dbl>, NounOnset <dbl>,
## #   EndingOnset <dbl>, EndingOffset <dbl>, Remaining <dbl>, SpeakerType <fct>,
## #   Sex <chr>, Age <dbl>, Position <chr>, Trackloss <lgl>,
## #   TimeMS_adjusted <dbl>, ArticleOnset_adjusted <dbl>, …
summary(fix.long)
##       ...1                          TOI           Participant     
##  Min.   :     1   Trial30-pos-sapo_0  :  28779   TA131  :  81555  
##  1st Qu.:183183   Trial41-def-mujer_0 :  28770   TA124  :  80841  
##  Median :366326   Trial06-def-brujo_0 :  28491   TA128  :  79215  
##  Mean   :366328   Trial08-def-puerta_0:  28377   TA142  :  76869  
##  3rd Qu.:549482   Trial10-pos-oso_0   :  28374   TA130  :  76527  
##  Max.   :732646   Trial29-pos-mosca_0 :  28338   TA137  :  76455  
##                   (Other)             :2022921   (Other):1722588  
##    Timeline              Bin         Bin_duration     TimeMS    
##  Length:2194050     Min.   :  1.0   Min.   :10    Min.   :  10  
##  Class :character   1st Qu.:180.0   1st Qu.:10    1st Qu.:1800  
##  Mode  :character   Median :342.0   Median :10    Median :3420  
##                     Mean   :337.6   Mean   :10    Mean   :3376  
##                     3rd Qu.:498.0   3rd Qu.:10    3rd Qu.:4980  
##                     Max.   :728.0   Max.   :10    Max.   :7280  
##                                                                 
##   Animacy_C          Animacy_D          Animacy_T          Determiner       
##  Length:2194050     Length:2194050     Length:2194050     Length:2194050    
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   N_ending_C         N_ending_D         N_ending_T         N_Gender_C       
##  Length:2194050     Length:2194050     Length:2194050     Length:2194050    
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   N_Gender_D         N_Gender_T        Phonol_Overlap_C   Phonol_Overlap_D
##  Length:2194050     Length:2194050     Length:2194050     Mode:logical    
##  Class :character   Class :character   Class :character   NA's:2194050    
##  Mode  :character   Mode  :character   Mode  :character                   
##                                                                           
##                                                                           
##                                                                           
##                                                                           
##               Phonol_Overlap_T   ArticleOnset    NounOnset     EndingOnset  
##  First Syll-stressed  :636219   Min.   :2403   Min.   :2496   Min.   :2591  
##  First Syll-unstressed:827898   1st Qu.:2448   1st Qu.:2576   1st Qu.:2762  
##  Minimal Pair         :729933   Median :2479   Median :2613   Median :2859  
##                                 Mean   :2490   Mean   :2619   Mean   :2877  
##                                 3rd Qu.:2524   3rd Qu.:2669   3rd Qu.:2972  
##                                 Max.   :2679   Max.   :2807   Max.   :3411  
##                                                                             
##   EndingOffset    Remaining    SpeakerType     Sex                 Age        
##  Min.   :2838   Min.   :3005   1:1008648   Length:2194050     Min.   :19.0    
##  1st Qu.:3020   1st Qu.:3144   2:1185402   Class :character   1st Qu.:20.0    
##  Median :3079   Median :3230               Mode  :character   Median :23.0    
##  Mean   :3107   Mean   :3239                                  Mean   :23.7    
##  3rd Qu.:3196   3rd Qu.:3315                                  3rd Qu.:25.0    
##  Max.   :3546   Max.   :3656                                  Max.   :50.0    
##                                                               NA's   :426369  
##    Position         Trackloss       TimeMS_adjusted   ArticleOnset_adjusted
##  Length:2194050     Mode :logical   Min.   :-2670.0   Min.   :0            
##  Class :character   FALSE:2194050   1st Qu.: -690.0   1st Qu.:0            
##  Mode  :character                   Median :  920.0   Median :0            
##                                     Mean   :  880.9   Mean   :0            
##                                     3rd Qu.: 2480.0   3rd Qu.:0            
##                                     Max.   : 4750.0   Max.   :0            
##                                                                            
##  NounOnset_adjusted EndingOnset_adjusted EndingOffset_adjusted
##  Min.   : 70.0      Min.   :170.0        Min.   :365.0        
##  1st Qu.:113.0      1st Qu.:276.0        1st Qu.:544.0        
##  Median :128.0      Median :388.0        Median :593.0        
##  Mean   :128.3      Mean   :386.3        Mean   :616.7        
##  3rd Qu.:144.0      3rd Qu.:459.0        3rd Qu.:694.0        
##  Max.   :202.0      Max.   :773.0        Max.   :908.0        
##                                                               
##  Remaining_adjusted GenderMatchTarget_D     AreaFixated      GenderMatch  
##  Min.   : 540.0     Min.   :0.0000      Competitor: 456042   D=C:1041837  
##  1st Qu.: 668.0     1st Qu.:0.0000      Distractor: 415515   D=T:1152213  
##  Median : 738.0     Median :1.0000      Target    :1322493                
##  Mean   : 748.9     Mean   :0.5252                                        
##  3rd Qu.: 818.0     3rd Qu.:1.0000                                        
##  Max.   :1018.0     Max.   :1.0000                                        
##                                                                           
##          AOI              FixP             Phonol_Overlap   
##  Competitor:731350   Min.   :0.0000   NonMinPair  :1464117  
##  Distractor:731350   1st Qu.:0.0000   Minimal Pair: 729933  
##  Target    :731350   Median :0.0000                         
##                      Mean   :0.3333                         
##                      3rd Qu.:1.0000                         
##                      Max.   :1.0000                         
## 
Compute the mean, SD, SE and CI for FixP for each time bin, AOI, SpeakerType, Determiner and TOI
fix.means = Rmisc::summarySE(fix.long, measurevar='FixP', groupvars=c('TimeMS_adjusted','SpeakerType', 'AOI', 'Determiner'), na.rm=T)
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
head(fix.means)
##   TimeMS_adjusted SpeakerType        AOI Determiner N FixP sd se ci
## 1           -2670           1 Competitor   Definite 5    0  0  0  0
## 2           -2670           1 Distractor   Definite 5    0  0  0  0
## 3           -2670           1     Target   Definite 5    1  0  0  0
## 4           -2670           2 Competitor   Definite 4    0  0  0  0
## 5           -2670           2 Distractor   Definite 4    0  0  0  0
## 6           -2670           2     Target   Definite 4    1  0  0  0
summary(fix.means)
##  TimeMS_adjusted   SpeakerType         AOI        Determiner       
##  Min.   :-2670.0   1:4011      Competitor:2793   Length:8379       
##  1st Qu.: -900.0   2:4368      Distractor:2793   Class :character  
##  Median :  850.0               Target    :2793   Mode  :character  
##  Mean   :  853.9                                                   
##  3rd Qu.: 2590.0                                                   
##  Max.   : 4750.0                                                   
##                                                                    
##        N              FixP              sd               se         
##  Min.   :  1.0   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:256.0   1st Qu.:0.1065   1st Qu.:0.2959   1st Qu.:0.01722  
##  Median :285.0   Median :0.3152   Median :0.4342   Median :0.02610  
##  Mean   :261.9   Mean   :0.3333   Mean   :0.3775   Mean   :0.02597  
##  3rd Qu.:303.0   3rd Qu.:0.3711   3rd Qu.:0.4724   3rd Qu.:0.02857  
##  Max.   :364.0   Max.   :1.0000   Max.   :0.5774   Max.   :0.33333  
##                                   NA's   :132      NA's   :132      
##        ci         
##  Min.   :0.00000  
##  1st Qu.:0.03390  
##  Median :0.05136  
##  Mean   :0.05370  
##  3rd Qu.:0.05625  
##  Max.   :1.43422  
##  NA's   :132
Compute Mean, etc. that takes Phonol_Overlap into account
fix.means.phonol = Rmisc::summarySE(fix.long, measurevar='FixP', groupvars=c('TimeMS_adjusted','SpeakerType', 'AOI', 'Determiner', 'Phonol_Overlap'), na.rm=T)
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
head(fix.means.phonol)
##   TimeMS_adjusted SpeakerType        AOI Determiner Phonol_Overlap N FixP sd se
## 1           -2670           1 Competitor   Definite   Minimal Pair 5    0  0  0
## 2           -2670           1 Distractor   Definite   Minimal Pair 5    0  0  0
## 3           -2670           1     Target   Definite   Minimal Pair 5    1  0  0
## 4           -2670           2 Competitor   Definite   Minimal Pair 4    0  0  0
## 5           -2670           2 Distractor   Definite   Minimal Pair 4    0  0  0
## 6           -2670           2     Target   Definite   Minimal Pair 4    1  0  0
##   ci
## 1  0
## 2  0
## 3  0
## 4  0
## 5  0
## 6  0
summary(fix.means.phonol)
##  TimeMS_adjusted   SpeakerType         AOI        Determiner       
##  Min.   :-2670.0   1:7941      Competitor:5488   Length:16464      
##  1st Qu.: -890.0   2:8523      Distractor:5488   Class :character  
##  Median :  830.0               Target    :5488   Mode  :character  
##  Mean   :  832.7                                                   
##  3rd Qu.: 2540.0                                                   
##  Max.   : 4750.0                                                   
##                                                                    
##       Phonol_Overlap       N              FixP              sd        
##  NonMinPair  :8229   Min.   :  1.0   Min.   :0.0000   Min.   :0.0000  
##  Minimal Pair:8235   1st Qu.: 92.0   1st Qu.:0.1121   1st Qu.:0.3040  
##                      Median :112.0   Median :0.3084   Median :0.4312  
##                      Mean   :133.3   Mean   :0.3333   Mean   :0.3841  
##                      3rd Qu.:190.0   3rd Qu.:0.3887   3rd Qu.:0.4739  
##                      Max.   :246.0   Max.   :1.0000   Max.   :0.7071  
##                                                       NA's   :384     
##        se               ci        
##  Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0254   1st Qu.:0.0502  
##  Median :0.0348   Median :0.0686  
##  Mean   :0.0374   Mean   :0.0783  
##  3rd Qu.:0.0461   3rd Qu.:0.0915  
##  Max.   :0.5000   Max.   :6.3531  
##  NA's   :384      NA's   :384
Compute Mean, etc. that takes Phonol_Overlap AND GenderMatch into account
fix.means.fourgroups = Rmisc::summarySE(fix.long, measurevar='FixP', groupvars=c('TimeMS_adjusted','SpeakerType', 'AOI', 'Determiner', 'Phonol_Overlap', 'GenderMatch'), na.rm=T)
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
head(fix.means.fourgroups)
##   TimeMS_adjusted SpeakerType        AOI Determiner Phonol_Overlap GenderMatch
## 1           -2670           1 Competitor   Definite   Minimal Pair         D=C
## 2           -2670           1 Distractor   Definite   Minimal Pair         D=C
## 3           -2670           1     Target   Definite   Minimal Pair         D=C
## 4           -2670           2 Competitor   Definite   Minimal Pair         D=C
## 5           -2670           2 Distractor   Definite   Minimal Pair         D=C
## 6           -2670           2     Target   Definite   Minimal Pair         D=C
##   N FixP sd se ci
## 1 5    0  0  0  0
## 2 5    0  0  0  0
## 3 5    1  0  0  0
## 4 4    0  0  0  0
## 5 4    0  0  0  0
## 6 4    1  0  0  0
summary(fix.means.fourgroups)
##  TimeMS_adjusted SpeakerType         AOI         Determiner       
##  Min.   :-2670   1:15756     Competitor:10768   Length:32304      
##  1st Qu.: -900   2:16548     Distractor:10768   Class :character  
##  Median :  790               Target    :10768   Mode  :character  
##  Mean   :  790                                                    
##  3rd Qu.: 2470                                                    
##  Max.   : 4750                                                    
##                                                                   
##       Phonol_Overlap  GenderMatch       N               FixP       
##  NonMinPair  :16155   D=C:16068   Min.   :  1.00   Min.   :0.0000  
##  Minimal Pair:16149   D=T:16236   1st Qu.: 44.00   1st Qu.:0.1143  
##                                   Median : 62.00   Median :0.3000  
##                                   Mean   : 67.92   Mean   :0.3333  
##                                   3rd Qu.: 95.00   3rd Qu.:0.4054  
##                                   Max.   :142.00   Max.   :1.0000  
##                                                                    
##        sd               se               ci        
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.3043   1st Qu.:0.0354   1st Qu.:0.0708  
##  Median :0.4280   Median :0.0489   Median :0.0975  
##  Mean   :0.3828   Mean   :0.0516   Mean   :0.1126  
##  3rd Qu.:0.4764   3rd Qu.:0.0635   3rd Qu.:0.1276  
##  Max.   :0.7071   Max.   :0.5000   Max.   :6.3531  
##  NA's   :525      NA's   :525      NA's   :525
Compute Mean, etc. that takes GenderMatch into account
fix.means.gendermatch = Rmisc::summarySE(fix.long, measurevar='FixP', groupvars=c('TimeMS_adjusted','SpeakerType', 'AOI', 'Determiner', 'GenderMatch'), na.rm=T)
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
head(fix.means.gendermatch)
##   TimeMS_adjusted SpeakerType        AOI Determiner GenderMatch N FixP sd se ci
## 1           -2670           1 Competitor   Definite         D=C 5    0  0  0  0
## 2           -2670           1 Distractor   Definite         D=C 5    0  0  0  0
## 3           -2670           1     Target   Definite         D=C 5    1  0  0  0
## 4           -2670           2 Competitor   Definite         D=C 4    0  0  0  0
## 5           -2670           2 Distractor   Definite         D=C 4    0  0  0  0
## 6           -2670           2     Target   Definite         D=C 4    1  0  0  0
summary(fix.means.gendermatch)
##  TimeMS_adjusted   SpeakerType         AOI        Determiner        GenderMatch
##  Min.   :-2670.0   1:7953      Competitor:5501   Length:16503       D=C:8202   
##  1st Qu.: -890.0   2:8550      Distractor:5501   Class :character   D=T:8301   
##  Median :  830.0               Target    :5501   Mode  :character              
##  Mean   :  833.2                                                               
##  3rd Qu.: 2550.0                                                               
##  Max.   : 4750.0                                                               
##                                                                                
##        N              FixP              sd               se        
##  Min.   :  1.0   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:127.0   1st Qu.:0.1078   1st Qu.:0.2992   1st Qu.:0.0248  
##  Median :141.0   Median :0.3101   Median :0.4337   Median :0.0366  
##  Mean   :132.9   Mean   :0.3333   Mean   :0.3830   Mean   :0.0359  
##  3rd Qu.:156.0   3rd Qu.:0.3816   3rd Qu.:0.4738   3rd Qu.:0.0406  
##  Max.   :196.0   Max.   :1.0000   Max.   :0.7071   Max.   :0.5000  
##                                   NA's   :405      NA's   :405     
##        ci        
##  Min.   :0.0000  
##  1st Qu.:0.0490  
##  Median :0.0723  
##  Mean   :0.0754  
##  3rd Qu.:0.0803  
##  Max.   :6.3531  
##  NA's   :405

2. Plotting

Plot mean fixation proportion to each AOI with standard error, dividing by SpeakerType (ASI and HSS) and Determiner type (Definite and Possessive)
fix.plot = ggplot() + 
  facet_wrap(~Determiner + SpeakerType, ncol=2,
             labeller = labeller(
              Determiner = c(
                "Definite" = "Definite determiner",
                "Possessive" = "Possessive determiner"
              ),
              SpeakerType = c(
                "1" = "ASI",
                "2" = "HSS"
              )
            )
             ) + theme_bw() + 
  geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.8) +
  geom_line(data=fix.means,aes(x=TimeMS_adjusted, y=FixP, group=AOI, colour=AOI, lty=AOI), lwd=1.5) + # TODO use SE or CI here?
  geom_ribbon(data=fix.means,aes(x=TimeMS_adjusted,ymin=FixP-se,ymax=FixP+se,color=AOI,fill=AOI), linewidth=.2, alpha=.3, lty="dashed", show.legend=F)  +
  labs(y="Fixation Proportion", x="Time relative to the target word onset (ms)") + 
  
  scale_x_continuous(limits=c(-300, 1400),expand=c(0,0),breaks=seq(-300, 1400, 200)) +
  scale_color_manual('AOI', values=c("#af2013","#002e65","#ffac00")) +
  scale_fill_manual('AOI', values=c("#af2013","#002e65","#ffac00")) +
  scale_linetype_manual('AOI', values=c("solid","dotted", "dotdash")) +
  
  theme(
    text=element_text(size=20),
    legend.key.height=unit(.3,"in"),
    legend.key.width=unit(.6,"in"),
    panel.spacing=unit(1, "in")) 

fix.plot
## Warning: Removed 1686 rows containing missing values or values outside the scale range
## (`geom_line()`).

Save the plot
ggsave("plots/fixation_plot.png", width = 20, height = 15, dpi = 300)
## Warning: Removed 1686 rows containing missing values or values outside the scale range
## (`geom_line()`).
Plot mean fixation proportion to each AOI with standard error, dividing by SpeakerType (ASI and HSS), Determiner type (Definite and Possessive) and Phonological Overlap (Minimal Pair and Non Minimal Pair)
fix.plot.phonol = ggplot() + 
  facet_wrap(~Determiner + SpeakerType + Phonol_Overlap, ncol=4,
             labeller = labeller(
              Determiner = c(
                "Definite" = "Definite determiner",
                "Possessive" = "Possessive determiner"
              ),
              SpeakerType = c(
                "1" = "ASI",
                "2" = "HSS"
              ),
              Phonol_Overlap = c(
                "Minimal Pair" = "Minimal Pair",
                "NonMinPair" = "Non Minimal Pair"
              )
            )
             ) + theme_bw() + 
  geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.8) +
  geom_line(data=fix.means.phonol,aes(x=TimeMS_adjusted, y=FixP, group=AOI, colour=AOI, lty=AOI), lwd=1.5) + # TODO use SE or CI here?
  geom_ribbon(data=fix.means.phonol,aes(x=TimeMS_adjusted,ymin=FixP-se,ymax=FixP+se,color=AOI,fill=AOI), linewidth=.2, alpha=.3, lty="dashed", show.legend=F)  +
  labs(y="Fixation Proportion", x="Time relative to the target word onset (ms)") + 
  
  scale_x_continuous(limits=c(-300, 1400),expand=c(0,0),breaks=seq(-300, 1400, 200)) +
  scale_color_manual('AOI', values=c("#af2013","#002e65","#ffac00")) +
  scale_fill_manual('AOI', values=c("#af2013","#002e65","#ffac00")) +
  scale_linetype_manual('AOI', values=c("solid","dotted", "dotdash")) +
  
  theme(
    text=element_text(size=20),
    legend.key.height=unit(.3,"in"),
    legend.key.width=unit(.6,"in"),
    panel.spacing=unit(1, "in")) 

fix.plot.phonol
## Warning: Removed 1686 rows containing missing values or values outside the scale range
## (`geom_line()`).

Save the plot
ggsave("plots/fixation_plot_phonol.png", width = 40, height = 15, dpi = 300)
## Warning: Removed 1686 rows containing missing values or values outside the scale range
## (`geom_line()`).
Plot mean fixation proportion to each AOI with standard error, dividing by SpeakerType (ASI and HSS), Determiner type (Definite and Possessive), Phonological Overlap (Minimal Pair and Non Minimal Pair) and GenderMatch (D=T and D=C)
fix.plot.fourgroups = ggplot() + 
  facet_wrap(~Determiner + SpeakerType + Phonol_Overlap + GenderMatch, ncol=4,
             labeller = labeller(
              Determiner = c(
                "Definite" = "Definite determiner",
                "Possessive" = "Possessive determiner"
              ),
              SpeakerType = c(
                "1" = "ASI",
                "2" = "HSS"
              ),
              Phonol_Overlap = c(
                "Minimal Pair" = "Minimal Pair",
                "NonMinPair" = "Non Minimal Pair"
              ),
              GenderMatch = c(
                "D=T" = "Distractor = Target",
                "D=C" = "Distractor = Competitor"
              )
            )
             ) + theme_bw() + 
  geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.8) +
  geom_line(data=fix.means.fourgroups,aes(x=TimeMS_adjusted, y=FixP, group=AOI, colour=AOI, lty=AOI), lwd=1.5) + # TODO use SE or CI here?
  geom_ribbon(data=fix.means.fourgroups,aes(x=TimeMS_adjusted,ymin=FixP-se,ymax=FixP+se,color=AOI,fill=AOI), linewidth=.2, alpha=.3, lty="dashed", show.legend=F)  +
  labs(y="Fixation Proportion", x="Time relative to the target word onset (ms)") + 
  
  scale_x_continuous(limits=c(-300, 1400),expand=c(0,0),breaks=seq(-300, 1400, 200)) +
  scale_color_manual('AOI', values=c("#af2013","#002e65","#ffac00")) +
  scale_fill_manual('AOI', values=c("#af2013","#002e65","#ffac00")) +
  scale_linetype_manual('AOI', values=c("solid","dotted", "dotdash")) +
  
  theme(
    text=element_text(size=20),
    legend.key.height=unit(.3,"in"),
    legend.key.width=unit(.6,"in"),
    panel.spacing=unit(1, "in")) 

fix.plot.fourgroups
## Warning: Removed 1686 rows containing missing values or values outside the scale range
## (`geom_line()`).

Save the plot
ggsave("plots/fixation_plot_fourgroups.png", width = 40, height = 30, dpi = 300)
## Warning: Removed 1686 rows containing missing values or values outside the scale range
## (`geom_line()`).
Plot mean fixation proportion to each AOI with standard error, dividing by SpeakerType (ASI and HSS), Determiner type (Definite and Possessive) and GenderMatch (D=T and D=C)
fix.plot.gendermatch = ggplot() + 
  facet_wrap(~Determiner + SpeakerType + GenderMatch, ncol=4,
             labeller = labeller(
              Determiner = c(
                "Definite" = "Definite determiner",
                "Possessive" = "Possessive determiner"
              ),
              SpeakerType = c(
                "1" = "ASI",
                "2" = "HSS"
              ),
              GenderMatch = c(
                "D=T" = "Distractor = Target",
                "D=C" = "Distractor = Competitor"
              )
            )
             ) + theme_bw() + 
  geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.8) +
  geom_line(data=fix.means.gendermatch,aes(x=TimeMS_adjusted, y=FixP, group=AOI, colour=AOI, lty=AOI), lwd=1.5) + # TODO use SE or CI here?
  geom_ribbon(data=fix.means.gendermatch,aes(x=TimeMS_adjusted,ymin=FixP-se,ymax=FixP+se,color=AOI,fill=AOI), linewidth=.2, alpha=.3, lty="dashed", show.legend=F)  +
  labs(y="Fixation Proportion", x="Time relative to the target word onset (ms)") + 
  
  scale_x_continuous(limits=c(-300, 1400),expand=c(0,0),breaks=seq(-300, 1400, 200)) +
  scale_color_manual('AOI', values=c("#af2013","#002e65","#ffac00")) +
  scale_fill_manual('AOI', values=c("#af2013","#002e65","#ffac00")) +
  scale_linetype_manual('AOI', values=c("solid","dotted", "dotdash")) +
  
  theme(
    text=element_text(size=20),
    legend.key.height=unit(.3,"in"),
    legend.key.width=unit(.6,"in"),
    panel.spacing=unit(1, "in")) 

fix.plot.gendermatch
## Warning: Removed 1686 rows containing missing values or values outside the scale range
## (`geom_line()`).

Save the plot
ggsave("plots/fixation_plot_gendermatch.png", width = 40, height = 15, dpi = 300)
## Warning: Removed 1686 rows containing missing values or values outside the scale range
## (`geom_line()`).
Combine Competitor and Distractor to create NonTarget columns and reduce the data set to two variables
fix.twovar <- fix.dat %>%
  mutate(NonTarget = Competitor + Distractor) %>%
  select(-Competitor, -Distractor)

fix.twovar <- fix.twovar %>%
  mutate(AreaFixated = ifelse(AreaFixated == "Target", "Target", "NonTarget"))

fix.twovar$AreaFixated <- as.factor(fix.twovar$AreaFixated)
Pivot binomially encoded columns
fix.twovar <- fix.twovar %>%
  pivot_longer(
    cols = c(Target, NonTarget),
    names_to = "AOI",
    values_to = "FixP"
  )

fix.twovar = fix.twovar %>% mutate_at(vars(AOI), as.factor)
Merge the non minimal pair values
fix.twovar$Phonol_Overlap <- fix.twovar$Phonol_Overlap_T %>% fct_collapse(NonMinPair = c("First Syll-unstressed", "First Syll-stressed"))

summary(fix.twovar)
##       ...1            TOI            Participant          Timeline        
##  Min.   :     1   Length:1462700     Length:1462700     Length:1462700    
##  1st Qu.:183183   Class :character   Class :character   Class :character  
##  Median :366326   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :366328                                                           
##  3rd Qu.:549482                                                           
##  Max.   :732646                                                           
##                                                                           
##       Bin         Bin_duration     TimeMS      Animacy_C        
##  Min.   :  1.0   Min.   :10    Min.   :  10   Length:1462700    
##  1st Qu.:180.0   1st Qu.:10    1st Qu.:1800   Class :character  
##  Median :342.0   Median :10    Median :3420   Mode  :character  
##  Mean   :337.6   Mean   :10    Mean   :3376                     
##  3rd Qu.:498.0   3rd Qu.:10    3rd Qu.:4980                     
##  Max.   :728.0   Max.   :10    Max.   :7280                     
##                                                                 
##   Animacy_D          Animacy_T          Determiner         N_ending_C       
##  Length:1462700     Length:1462700     Length:1462700     Length:1462700    
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   N_ending_D         N_ending_T         N_Gender_C         N_Gender_D       
##  Length:1462700     Length:1462700     Length:1462700     Length:1462700    
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   N_Gender_T        Phonol_Overlap_C   Phonol_Overlap_D Phonol_Overlap_T  
##  Length:1462700     Length:1462700     Mode:logical     Length:1462700    
##  Class :character   Class :character   NA's:1462700     Class :character  
##  Mode  :character   Mode  :character                    Mode  :character  
##                                                                           
##                                                                           
##                                                                           
##                                                                           
##   ArticleOnset    NounOnset     EndingOnset    EndingOffset    Remaining   
##  Min.   :2403   Min.   :2496   Min.   :2591   Min.   :2838   Min.   :3005  
##  1st Qu.:2448   1st Qu.:2576   1st Qu.:2762   1st Qu.:3020   1st Qu.:3144  
##  Median :2479   Median :2613   Median :2859   Median :3079   Median :3230  
##  Mean   :2490   Mean   :2619   Mean   :2877   Mean   :3107   Mean   :3239  
##  3rd Qu.:2524   3rd Qu.:2669   3rd Qu.:2972   3rd Qu.:3196   3rd Qu.:3315  
##  Max.   :2679   Max.   :2807   Max.   :3411   Max.   :3546   Max.   :3656  
##                                                                            
##   SpeakerType       Sex                 Age           Position        
##  Min.   :1.00   Length:1462700     Min.   :19.00    Length:1462700    
##  1st Qu.:1.00   Class :character   1st Qu.:20.00    Class :character  
##  Median :2.00   Mode  :character   Median :23.00    Mode  :character  
##  Mean   :1.54                      Mean   :23.75                      
##  3rd Qu.:2.00                      3rd Qu.:25.00                      
##  Max.   :2.00                      Max.   :50.00                      
##                                    NA's   :284246                     
##  Trackloss       TimeMS_adjusted   ArticleOnset_adjusted NounOnset_adjusted
##  Mode :logical   Min.   :-2670.0   Min.   :0             Min.   : 70.0     
##  FALSE:1462700   1st Qu.: -690.0   1st Qu.:0             1st Qu.:113.0     
##                  Median :  920.0   Median :0             Median :128.0     
##                  Mean   :  880.9   Mean   :0             Mean   :128.3     
##                  3rd Qu.: 2480.0   3rd Qu.:0             3rd Qu.:144.0     
##                  Max.   : 4750.0   Max.   :0             Max.   :202.0     
##                                                                            
##  EndingOnset_adjusted EndingOffset_adjusted Remaining_adjusted
##  Min.   :170.0        Min.   :365.0         Min.   : 540.0    
##  1st Qu.:276.0        1st Qu.:544.0         1st Qu.: 668.0    
##  Median :388.0        Median :593.0         Median : 738.0    
##  Mean   :386.3        Mean   :616.7         Mean   : 748.9    
##  3rd Qu.:459.0        3rd Qu.:694.0         3rd Qu.: 818.0    
##  Max.   :773.0        Max.   :908.0         Max.   :1018.0    
##                                                               
##  GenderMatchTarget_D    AreaFixated     GenderMatch               AOI        
##  Min.   :0.0000      NonTarget:581038   Length:1462700     NonTarget:731350  
##  1st Qu.:0.0000      Target   :881662   Class :character   Target   :731350  
##  Median :1.0000                         Mode  :character                     
##  Mean   :0.5252                                                              
##  3rd Qu.:1.0000                                                              
##  Max.   :1.0000                                                              
##                                                                              
##       FixP          Phonol_Overlap  
##  Min.   :0.0   NonMinPair  :976078  
##  1st Qu.:0.0   Minimal Pair:486622  
##  Median :0.5                        
##  Mean   :0.5                        
##  3rd Qu.:1.0                        
##  Max.   :1.0                        
## 
head(fix.twovar)
## # A tibble: 6 × 42
##    ...1 TOI   Participant Timeline   Bin Bin_duration TimeMS Animacy_C Animacy_D
##   <dbl> <chr> <chr>       <chr>    <dbl>        <dbl>  <dbl> <chr>     <chr>    
## 1     1 Tria… TA105       VersionA    23           10    230 Animate-… Animate-…
## 2     1 Tria… TA105       VersionA    23           10    230 Animate-… Animate-…
## 3     2 Tria… TA105       VersionA    24           10    240 Animate-… Animate-…
## 4     2 Tria… TA105       VersionA    24           10    240 Animate-… Animate-…
## 5     3 Tria… TA105       VersionA    25           10    250 Animate-… Animate-…
## 6     3 Tria… TA105       VersionA    25           10    250 Animate-… Animate-…
## # ℹ 33 more variables: Animacy_T <chr>, Determiner <chr>, N_ending_C <chr>,
## #   N_ending_D <chr>, N_ending_T <chr>, N_Gender_C <chr>, N_Gender_D <chr>,
## #   N_Gender_T <chr>, Phonol_Overlap_C <chr>, Phonol_Overlap_D <lgl>,
## #   Phonol_Overlap_T <chr>, ArticleOnset <dbl>, NounOnset <dbl>,
## #   EndingOnset <dbl>, EndingOffset <dbl>, Remaining <dbl>, SpeakerType <dbl>,
## #   Sex <chr>, Age <dbl>, Position <chr>, Trackloss <lgl>,
## #   TimeMS_adjusted <dbl>, ArticleOnset_adjusted <dbl>, …
Compute mean fixation proportions for the two-variable version
fix.means.twovar = Rmisc::summarySE(fix.twovar, measurevar='FixP', groupvars=c('TimeMS_adjusted','SpeakerType', 'AOI', 'Determiner'), na.rm=T)
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
fix.means.twovar = fix.means.twovar %>% mutate_at(vars(SpeakerType, Determiner), as.factor)

summary(fix.means.twovar)
##  TimeMS_adjusted   SpeakerType        AOI            Determiner  
##  Min.   :-2670.0   1:2674      NonTarget:2793   Definite  :2792  
##  1st Qu.: -900.0   2:2912      Target   :2793   Possessive:2794  
##  Median :  850.0                                                 
##  Mean   :  853.9                                                 
##  3rd Qu.: 2590.0                                                 
##  Max.   : 4750.0                                                 
##                                                                  
##        N              FixP              sd               se         
##  Min.   :  1.0   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:256.0   1st Qu.:0.2891   1st Qu.:0.3629   1st Qu.:0.02152  
##  Median :285.0   Median :0.5000   Median :0.4597   Median :0.02706  
##  Mean   :261.9   Mean   :0.5000   Mean   :0.4136   Mean   :0.02904  
##  3rd Qu.:303.0   3rd Qu.:0.7109   3rd Qu.:0.4802   3rd Qu.:0.02918  
##  Max.   :364.0   Max.   :1.0000   Max.   :0.5774   Max.   :0.33333  
##                                   NA's   :88       NA's   :88       
##        ci         
##  Min.   :0.00000  
##  1st Qu.:0.04236  
##  Median :0.05324  
##  Mean   :0.06087  
##  3rd Qu.:0.05745  
##  Max.   :1.43422  
##  NA's   :88
head(fix.means.twovar)
##   TimeMS_adjusted SpeakerType       AOI Determiner N FixP sd se ci
## 1           -2670           1 NonTarget   Definite 5    0  0  0  0
## 2           -2670           1    Target   Definite 5    1  0  0  0
## 3           -2670           2 NonTarget   Definite 4    0  0  0  0
## 4           -2670           2    Target   Definite 4    1  0  0  0
## 5           -2660           1 NonTarget   Definite 5    0  0  0  0
## 6           -2660           1    Target   Definite 5    1  0  0  0
Plot mean fixation proportion vs. time with the two-variable version of the data
fix.plot.twovar = ggplot() + 
  facet_wrap(~Determiner + SpeakerType, ncol=2,
             labeller = labeller(
              Determiner = c(
                "Definite" = "Definite determiner",
                "Possessive" = "Possessive determiner"
              ),
              SpeakerType = c(
                "1" = "ASI",
                "2" = "HSS"
              )
            )
             ) + theme_bw() + 
  geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.8) +
  geom_line(data=fix.means.twovar,aes(x=TimeMS_adjusted, y=FixP, group=AOI, colour=AOI, lty=AOI), lwd=1.5) +
  geom_ribbon(data=fix.means.twovar,aes(x=TimeMS_adjusted,ymin=FixP-se,ymax=FixP+se,color=AOI,fill=AOI), linewidth=.2, alpha=.3, lty="dashed", show.legend=F)  +
  labs(y="Fixation Proportion", x="Time relative to the target word onset (ms)") + 
  
  scale_x_continuous(limits=c(-300, 1400),expand=c(0,0),breaks=seq(-300, 1400, 200)) +
  scale_color_manual('AOI', values=c("#969798","#ffac00")) +
  scale_fill_manual('AOI', values=c("#969798","#ffac00")) +
  scale_linetype_manual('AOI', values=c("solid","dotted")) +
  
  theme(
    text=element_text(size=20),
    legend.key.height=unit(.3,"in"),
    legend.key.width=unit(.6,"in"),
    panel.spacing=unit(1, "in")) 

fix.plot.twovar
## Warning: Removed 1124 rows containing missing values or values outside the scale range
## (`geom_line()`).

Save the plot
ggsave("plots/fixation_plot_twovar.png", width = 20, height = 15, dpi = 300)
## Warning: Removed 1124 rows containing missing values or values outside the scale range
## (`geom_line()`).

3. DPA

Here we will run a by-participant analysis testing the divergence between the Target and NonTarget AOIs.

Select time window and create stratification variables
div.dat = fix.twovar %>%  
  filter(1400>=TimeMS_adjusted & TimeMS_adjusted>=-300) %>% 
  dplyr::mutate(StrataVars=paste(Participant, AOI, TimeMS_adjusted, sep='')) %>% 
  mutate_at(vars(SpeakerType, StrataVars),as.factor) %>% 
  droplevels()

summary(div.dat)
##       ...1            TOI            Participant          Timeline        
##  Min.   :    85   Length:393148      Length:393148      Length:393148     
##  1st Qu.:187259   Class :character   Class :character   Class :character  
##  Median :372068   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :369144                                                           
##  3rd Qu.:553190                                                           
##  Max.   :732480                                                           
##                                                                           
##       Bin         Bin_duration     TimeMS      Animacy_C        
##  Min.   :211.0   Min.   :10    Min.   :2110   Length:393148     
##  1st Qu.:265.0   1st Qu.:10    1st Qu.:2650   Class :character  
##  Median :307.0   Median :10    Median :3070   Mode  :character  
##  Mean   :306.1   Mean   :10    Mean   :3061                     
##  3rd Qu.:347.0   3rd Qu.:10    3rd Qu.:3470                     
##  Max.   :408.0   Max.   :10    Max.   :4080                     
##                                                                 
##   Animacy_D          Animacy_T          Determiner         N_ending_C       
##  Length:393148      Length:393148      Length:393148      Length:393148     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   N_ending_D         N_ending_T         N_Gender_C         N_Gender_D       
##  Length:393148      Length:393148      Length:393148      Length:393148     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   N_Gender_T        Phonol_Overlap_C   Phonol_Overlap_D Phonol_Overlap_T  
##  Length:393148      Length:393148      Mode:logical     Length:393148     
##  Class :character   Class :character   NA's:393148      Class :character  
##  Mode  :character   Mode  :character                    Mode  :character  
##                                                                           
##                                                                           
##                                                                           
##                                                                           
##   ArticleOnset    NounOnset     EndingOnset    EndingOffset    Remaining   
##  Min.   :2403   Min.   :2496   Min.   :2591   Min.   :2838   Min.   :3005  
##  1st Qu.:2450   1st Qu.:2576   1st Qu.:2762   1st Qu.:3020   1st Qu.:3144  
##  Median :2479   Median :2614   Median :2871   Median :3079   Median :3243  
##  Mean   :2491   Mean   :2620   Mean   :2879   Mean   :3108   Mean   :3240  
##  3rd Qu.:2524   3rd Qu.:2669   3rd Qu.:2974   3rd Qu.:3196   3rd Qu.:3315  
##  Max.   :2679   Max.   :2807   Max.   :3411   Max.   :3546   Max.   :3656  
##                                                                            
##  SpeakerType     Sex                 Age          Position        
##  1:183908    Length:393148      Min.   :19.00   Length:393148     
##  2:209240    Class :character   1st Qu.:20.00   Class :character  
##              Mode  :character   Median :23.00   Mode  :character  
##                                 Mean   :23.79                     
##                                 3rd Qu.:25.00                     
##                                 Max.   :50.00                     
##                                 NA's   :77128                     
##  Trackloss       TimeMS_adjusted  ArticleOnset_adjusted NounOnset_adjusted
##  Mode :logical   Min.   :-300.0   Min.   :0             Min.   : 70.0     
##  FALSE:393148    1st Qu.: 160.0   1st Qu.:0             1st Qu.:113.0     
##                  Median : 580.0   Median :0             Median :128.0     
##                  Mean   : 564.7   Mean   :0             Mean   :128.4     
##                  3rd Qu.: 980.0   3rd Qu.:0             3rd Qu.:144.0     
##                  Max.   :1400.0   Max.   :0             Max.   :202.0     
##                                                                           
##  EndingOnset_adjusted EndingOffset_adjusted Remaining_adjusted
##  Min.   :170.0        Min.   :365           Min.   : 540.0    
##  1st Qu.:276.0        1st Qu.:544           1st Qu.: 668.0    
##  Median :388.0        Median :594           Median : 739.0    
##  Mean   :387.4        Mean   :617           Mean   : 749.4    
##  3rd Qu.:459.0        3rd Qu.:694           3rd Qu.: 818.0    
##  Max.   :773.0        Max.   :908           Max.   :1018.0    
##                                                               
##  GenderMatchTarget_D    AreaFixated     GenderMatch               AOI        
##  Min.   :0.0000      NonTarget:192712   Length:393148      NonTarget:196574  
##  1st Qu.:0.0000      Target   :200436   Class :character   Target   :196574  
##  Median :1.0000                         Mode  :character                     
##  Mean   :0.5224                                                              
##  3rd Qu.:1.0000                                                              
##  Max.   :1.0000                                                              
##                                                                              
##       FixP          Phonol_Overlap                StrataVars    
##  Min.   :0.0   NonMinPair  :260806   TA141NonTarget890 :    48  
##  1st Qu.:0.0   Minimal Pair:132342   TA141NonTarget960 :    48  
##  Median :0.5                         TA141Target890    :    48  
##  Mean   :0.5                         TA141Target960    :    48  
##  3rd Qu.:1.0                         TA105NonTarget1080:    47  
##  Max.   :1.0                         TA105NonTarget1110:    47  
##                                      (Other)           :392862
head(div.dat)
## # A tibble: 6 × 43
##    ...1 TOI   Participant Timeline   Bin Bin_duration TimeMS Animacy_C Animacy_D
##   <dbl> <chr> <chr>       <chr>    <dbl>        <dbl>  <dbl> <chr>     <chr>    
## 1    85 Tria… TA105       VersionA   324           10   3240 Animate-… Animate-…
## 2    85 Tria… TA105       VersionA   324           10   3240 Animate-… Animate-…
## 3    86 Tria… TA105       VersionA   325           10   3250 Animate-… Animate-…
## 4    86 Tria… TA105       VersionA   325           10   3250 Animate-… Animate-…
## 5    87 Tria… TA105       VersionA   326           10   3260 Animate-… Animate-…
## 6    87 Tria… TA105       VersionA   326           10   3260 Animate-… Animate-…
## # ℹ 34 more variables: Animacy_T <chr>, Determiner <chr>, N_ending_C <chr>,
## #   N_ending_D <chr>, N_ending_T <chr>, N_Gender_C <chr>, N_Gender_D <chr>,
## #   N_Gender_T <chr>, Phonol_Overlap_C <chr>, Phonol_Overlap_D <lgl>,
## #   Phonol_Overlap_T <chr>, ArticleOnset <dbl>, NounOnset <dbl>,
## #   EndingOnset <dbl>, EndingOffset <dbl>, Remaining <dbl>, SpeakerType <fct>,
## #   Sex <chr>, Age <dbl>, Position <chr>, Trackloss <lgl>,
## #   TimeMS_adjusted <dbl>, ArticleOnset_adjusted <dbl>, …
Create two subsets based on the type of determiner
div.dat.def <- div.dat %>% filter(Determiner == "Definite")
div.dat.pos <- div.dat %>% filter(Determiner == "Possessive")
Define bootstrap functions to compute a divergence point for the target vs. unrelated conditions

Number of consecutive statistically significant time bins required to establish divergence, and number of bootstrap iterations

Nbins = 20
Niter = 100L

Bootstrap function using lmer

boot_L1L2_lmer = function(original_data, resample_indices){
  dat = original_data[resample_indices, ]
  
  # a statistical test at each timepoint for each group
  test_g1 = dat %>% # t-test for L1 group
    subset(SpeakerType == 1) %>% group_by(TimeMS_adjusted) %>%
    dplyr::summarise(t = summary(lmer(FixP ~ AOI + (1|Participant)))$coefficients["AOITarget", "t value"])

  test_g2 = dat %>% # t-test for L2 group
    subset(SpeakerType == 2) %>% group_by(TimeMS_adjusted) %>%
    dplyr::summarise(t = summary(lmer(FixP ~ AOI + (1|Participant)))$coefficients["AOITarget", "t value"])

  # return a TRUE/FALSE vector of significant positive t-scores
  # (positive means more looks to the target than unrelated)
  t_g1 = test_g1$t > 1.96
  t_g2 = test_g2$t > 1.96
  
  # create empty vectors to store onsets
  onset_g1 = onset_g2 = c()
  
  # find the index of the earliest run of sequential TRUEs; threshold is Nbins
  for (i in 1:(length(t_g2)-Nbins)) {
    onset_g1[i] = sum(t_g1[i:(i+Nbins-1)]) == Nbins
    onset_g2[i] = sum(t_g2[i:(i+Nbins-1)]) == Nbins
  }
  
  # find the difference between onsets
  delta_g1g2 = which(onset_g2)[1] - which(onset_g1)[1]
  
  # print 
  # note: the bootstrap returns the indices of the respective timepoints, not absolute times. 
  # The annotations to the right of each index (e.g. t[,1]) indicate where in the boot object the bootstrapped onset distributions can be found.
  c(delta_g1g2,         # onset difference L1 vs. L2 t[,1]
    which(onset_g1)[1], # onset bin for looks to target L1 t[,2]
    which(onset_g2)[1])  # onset bin for looks to target L2 t[,3]
}

Run the lmer bootstrap on the two subsets by determiner type

bootres_L1L2_lmer_def = boot::boot(
  data = div.dat.def,  # data set to bootstrap       
  statistic = boot_L1L2_lmer,  # bootstrap function      
  strata = div.dat.def$StrataVars, # stratification variable 
  R = Niter)  # number of iterations

bootres_L1L2_lmer_pos = boot::boot(
  data = div.dat.pos,  # data set to bootstrap       
  statistic = boot_L1L2_lmer,  # bootstrap function      
  strata = div.dat.pos$StrataVars, # stratification variable 
  R = Niter)  # number of iterations

bootres_L1L2_lmer_def
bootres_L1L2_lmer_pos

Save the lmer results

save(bootres_L1L2_lmer_def, file="DPA_results.lmer.def.rds")
save(bootres_L1L2_lmer_pos, file="DPA_results.lmer.pos.rds")
Running the lmer bootstrap by trial

This ended up yielding mostly NAs due to the small sample size.

Define a function that runs the bootstrap function for all the trials

run_boot_per_trial <- function(toi_id, df, Niter = 100, strata_col = "StrataVars") {
  trial_data <- df %>% filter(TOI == toi_id) %>% droplevels()

  boot_result <- boot::boot(
    data = trial_data,
    statistic = boot_L1L2_glm,  # your divergence function
    strata = trial_data[[strata_col]],
    R = Niter
  )

  # Extract metadata from the first row (assuming Determiner etc. are constant per trial)
  trial_info <- trial_data %>%
    slice(1) %>%
    select(TOI, Determiner, Phonol_Overlap, GenderMatch)  # add more if needed

  list(boot = boot_result, info = trial_info)
}

Run the function that runs the bootstrap function

trial_ids <- unique(div.dat$TOI)

# Store both the bootstrap and metadata
boot_trials <- lapply(trial_ids, function(toi) run_boot_per_trial(toi, div.dat, Niter = 100))
names(boot_trials) <- trial_ids

Extract divergence values + metadata into a summary dataframe

# Function to extract means and metadata
extract_div_summary <- function(result) {
  bootres <- result$boot
  info <- result$info

  tibble::tibble(
    TOI = info$TOI,
    Determiner = info$Determiner,
    Phonol_Overlap = info$Phonol_Overlap,
    GenderMatch = info$GenderMatchTarget_D,
    ASI_divergence = mean(bootres$t[,2], na.rm = TRUE) * 10 - 10,
    HSS_divergence = mean(bootres$t[,3], na.rm = TRUE) * 10 - 10
  )
}

# Build summary
div_summary_df <- purrr::map_dfr(boot_trials, extract_div_summary)

Bootstrap function using glm (generalized linear model)

boot_L1L2_glm = function(original_data, resample_indices){
  dat = original_data[resample_indices, ]
  
  # a statistical test at each timepoint for each group
  test_g1 = dat %>% # t-test for L1 group
    subset(SpeakerType == 1) %>% group_by(TimeMS_adjusted) %>%
    dplyr::summarise(t = summary(glm(FixP ~ AOI, family = binomial))$coefficients["AOITarget", "z value"])

  test_g2 = dat %>% # t-test for L2 group
    subset(SpeakerType == 2) %>% group_by(TimeMS_adjusted) %>%
    dplyr::summarise(t = summary(glm(FixP ~ AOI, family = binomial))$coefficients["AOITarget", "z value"])

  # return a TRUE/FALSE vector of significant positive t-scores
  # (positive means more looks to the target than unrelated)
  t_g1 = test_g1$t > 1.96
  t_g2 = test_g2$t > 1.96
  
  # create empty vectors to store onsets
  onset_g1 = onset_g2 = c()
  
  # find the index of the earliest run of sequential TRUEs; threshold is Nbins
  for (i in 1:(length(t_g2)-Nbins)) {
    onset_g1[i] = sum(t_g1[i:(i+Nbins-1)]) == Nbins
    onset_g2[i] = sum(t_g2[i:(i+Nbins-1)]) == Nbins
  }
  
  # find the difference between onsets
  delta_g1g2 = which(onset_g2)[1] - which(onset_g1)[1]
  
  # print 
  # note: the bootstrap returns the indices of the respective timepoints, not absolute times. 
  # The annotations to the right of each index (e.g. t[,1]) indicate where in the boot object the bootstrapped onset distributions can be found.
  c(delta_g1g2,         # onset difference L1 vs. L2 t[,1]
    which(onset_g1)[1], # onset bin for looks to target L1 t[,2]
    which(onset_g2)[1])  # onset bin for looks to target L2 t[,3]
}

Run the glm bootstrap on the two subsets by determiner type

bootres_L1L2_glm_def = boot::boot(
  data = div.dat.def,  # data set to bootstrap       
  statistic = boot_L1L2_glm,  # bootstrap function      
  strata = div.dat.def$StrataVars, # stratification variable 
  R = Niter)  # number of iterations

bootres_L1L2_glm_pos = boot::boot(
  data = div.dat.pos,  # data set to bootstrap       
  statistic = boot_L1L2_glm,  # bootstrap function      
  strata = div.dat.pos$StrataVars, # stratification variable 
  R = Niter)  # number of iterations

bootres_L1L2_glm_def
bootres_L1L2_glm_pos

Save the glm results

save(bootres_L1L2_glm_def, file="DPA_results.glm.def.rds")
save(bootres_L1L2_glm_pos, file="DPA_results.glm.pos.rds")
Extracting and plotting the divergence points

Load the lmer results

load("DPA_results.lmer.def.rds")
load("DPA_results.lmer.pos.rds")

Load the glm results

load("DPA_results.glm.def.rds")
load("DPA_results.glm.pos.rds")

Define function to extract divergence points and confidence intervals from bootstrapping results

get_divergence_ms <- function(bootres, index) {
  # index = 2 for ASI, 3 for HSS
  mean_bin <- mean(bootres$t[, index], na.rm = TRUE)
  ci_bin <- boot::boot.ci(bootres, index = index, type = "perc")$percent[4:5]
  
  mean_ms <- (mean_bin - 1) * 10
  ci_ms <- (ci_bin - 1) * 10
  
  list(mean = mean_ms, ci = ci_ms)
}

Run the function on all the results

# Definite determiner
def_asi <- get_divergence_ms(bootres_L1L2_lmer_def, index = 2)
def_hss <- get_divergence_ms(bootres_L1L2_lmer_def, index = 3)

# Possessive determiner
pos_asi <- get_divergence_ms(bootres_L1L2_lmer_pos, index = 2)
pos_hss <- get_divergence_ms(bootres_L1L2_lmer_pos, index = 3)

Create dataframe for plotting

div_df <- tibble::tibble(
  Determiner = c("Definite", "Definite", "Possessive", "Possessive"),
  SpeakerType = c("1", "2", "1", "2"),
  TimeMS = c(def_asi$mean, def_hss$mean, pos_asi$mean, pos_hss$mean),
  CI_low = c(def_asi$ci[1], def_hss$ci[1], pos_asi$ci[1], pos_hss$ci[1]),
  CI_high = c(def_asi$ci[2], def_hss$ci[2], pos_asi$ci[2], pos_hss$ci[2])
)

div_df = div_df %>% mutate_at(vars(SpeakerType, Determiner), as.factor)

div_df <- div_df %>%
  mutate(label = paste0(TimeMS, " ms"))

Add divergence points and their confidence intervals to the two-variable plots

fix.plot.twovar +
  geom_point(data = div_df,
             aes(x = TimeMS, y = 0.48),
             shape = 18, size = 4, color = "black") +
  geom_errorbarh(data = div_df,
                 aes(xmin = CI_low, xmax = CI_high, y = 0.48),
                 height = 0.02, color = "black", linewidth = 0.6) +
  geom_text(data = div_df,
          aes(x = TimeMS - 50, y = 0.55, label = label), 
          size = 10, fontface="bold")
## Warning: Removed 1124 rows containing missing values or values outside the scale range
## (`geom_line()`).

Save the plots

ggsave("plots/fixation_plot_twovar_divpts.png", width = 20, height = 15, dpi = 300)
## Warning: Removed 1124 rows containing missing values or values outside the scale range
## (`geom_line()`).

4. Group comparison

We can compute a p-value for the group comparison by creating a bootstrap distribution of the null hypothesis. To do this, we will first pool the original data from both groups, randomly assign group labels, and then estimate a difference in divergence point. We will repeat this many times to obtain a distribution of divergence point that could be expected under the null hypothesis. We will obtain the p-value by calculating the proportion of samples from this null distribution that are larger than the observed difference in divergence point in the empirical data.

Below, we will calculate a p-value for the group comparison (ASI vs. HSS group). We will first define a bootstrap function.

Define the bootstrap function for group comparison (only lmer so far)
boot_L1L2_pval = function(original_data, resample_indices){
  dat_resample = original_data[resample_indices, ] # resample the data
  
  dat = dat_resample %>% 
    group_by(Participant, AOI, TimeMS_adjusted) %>% 
    transform(SpeakerType=sample(SpeakerType,replace=F)) %>% # randomly assign group labels 
    ungroup() #%>% 
  
  # a statistical test at each timepoint for each group
  test_g1 = dat %>% # t-test for L1 group
    subset(SpeakerType == 1) %>% group_by(TimeMS_adjusted) %>%
    dplyr::summarise(t = summary(lmer(FixP ~ AOI + (1|Participant)))$coefficients["AOITarget", "t value"])

  test_g2 = dat %>% # t-test for L2 group
    subset(SpeakerType == 2) %>% group_by(TimeMS_adjusted) %>%
    dplyr::summarise(t = summary(lmer(FixP ~ AOI + (1|Participant)))$coefficients["AOITarget", "t value"])
  
  # return a TRUE/FALSE vector of significant positive t-scores  
  # (positive means more looks to the target than unrelated)
  t_g1 = test_g1$t > 1.96
  t_g2 = test_g2$t > 1.96
  
  # create empty vectors to store onsets
  onset_g1 = onset_g2 = c()
  
  # find the index of the earliest run of 4 sequential TRUEs 
  for (i in 1:(length(t_g2)-Nbins)) { 
    onset_g1[i] = sum(t_g1[i:(i+Nbins-1)]) == Nbins
    onset_g2[i] = sum(t_g2[i:(i+Nbins-1)]) == Nbins
  }
  
  # find the difference between onsets
  delta_g1g2 = which(onset_g2)[1] - which(onset_g1)[1]
  
  # print 
  delta_g1g2  # onset difference L1 vs. L2 t[,1]
}
Run the lmer bootstrap on the two subsets by determiner type
bootres_L1L2_pval_def = boot::boot(
  data = div.dat.def,  # data set to bootstrap       
  statistic = boot_L1L2_pval,  # bootstrap function      
  strata = div.dat.def$StrataVars, # stratification variable 
  R = Niter)  # number of iterations        

bootres_L1L2_pval_pos = boot::boot(
  data = div.dat.pos,  # data set to bootstrap       
  statistic = boot_L1L2_pval,  # bootstrap function      
  strata = div.dat.pos$StrataVars, # stratification variable 
  R = Niter)  # number of iterations   

bootres_L1L2_pval_def
bootres_L1L2_pval_pos
Save the group comparison results
save(bootres_L1L2_pval, file="DPA_results.pval.def.rds")
save(bootres_L1L2_pval, file="DPA_results.pval.pos.rds")
If we already have saved results, we can load them.
load("DPA_results.pval.def.rds")
load("DPA_results.pval.pos.rds")
Compute the p-values
pval.def <- round(mean(bootres_L1L2_pval_def$t[,1]>=bootres_L1L2_lmer_def$t0[1], na.rm=T), 5)        
pval.pos <- round(mean(bootres_L1L2_pval_pos$t[,1]>=bootres_L1L2_lmer_pos$t0[1], na.rm=T), 5) 

pval.def
pval.pos